package com.darrenchan.hadoop.mr.wc;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class WCRunner {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        //加上下面这句话，就访问HDFS，否则就是本地文件系统，访问HDFS需要开启权限
//        System.setProperty("HADOOP_USER_NAME", "hadoop");
//        conf.set("fs.defaultFS", "hdfs://hadoop000:8020");

        Job wcjob = Job.getInstance(conf);

        //设置整个job所用的那些类在哪个jar包
        wcjob.setJarByClass(WCRunner.class);


        //本job使用的mapper和reducer的类
        wcjob.setMapperClass(WCMapper.class);
        wcjob.setReducerClass(WCReducer.class);


        //指定reduce的输出数据kv类型
        wcjob.setOutputKeyClass(Text.class);
        wcjob.setOutputValueClass(LongWritable.class);

        //指定mapper的输出数据kv类型
        wcjob.setMapOutputKeyClass(Text.class);
        wcjob.setMapOutputValueClass(LongWritable.class);


        //指定要处理的输入数据存放路径
        FileInputFormat.setInputPaths(wcjob, new Path("/home"));

        //指定处理结果的输出数据存放路径
        FileOutputFormat.setOutputPath(wcjob, new Path("output\\"));

        //将job提交给集群运行 ，将运行状态进行打印
        System.exit(wcjob.waitForCompletion(true) ? 0 : 1);
    }
}
